# install.packages("pacman")
pacman::p_load(
here,
tidyverse,
# Rcan, # package for Cancer Registry Data Analysis and Visualisation
PHEindicatormethods, # package for standardising rates
DT # package for table formatting and styling
)
# source(here("code", "01_prep.R"))
# Most recent data, from CI5-XII
swiss_XII <- readRDS(here("data", "generated_data", "swiss_XII.rds"))
# Annual data, from CI5-I to CI5-XII
swiss_plus <- readRDS(here("data", "generated_data", "swiss_plus.rds"))
# Canton polygons data
canton_boundaries <- readRDS(here("data", "generated_data", "canton_boundaries.rds"))R coding to calculate standardised rates and ratios from cancer registry data
Code
/* set DT table fontsizes */
th { font-size: 11px; } /* header font */
td { font-size: 11px; } /* cell font */CI5 XII
Directly standardised rates
This section uses data from Volume XII of the IARC’s “Cancer Incidence in Five Continents (CI5)” collaboration: https://ci5.iarc.fr/ci5-xii/download. The full dataset was then restricted to data from Switzerland-based cancer registries only.
World and European Standard Populations were downloaded from: https://www.opendata.nhs.scot/dataset/standard-populations
When using the calculate_dsr() function from the PHEindicatormethods package, when the cases within the strata are less than 10 the DSR is suppressed (“When the total count is < 10 DSRs are not reliable and will therefore be suppressed in the output”).
Using European Standard Population
Code
# "When the total count is < 10 DSRs are not reliable and will therefore be suppressed in the output." (from the `PHEindicatormethods::calculate_dsr()` funtion)
## European Standard Population
swiss_XII |>
# filter(
# cancer_label %in% c("Colon")
# , id_region %in% c("Geneva", "Vaud")
# ) |>
# Add grouping variables of interest
group_by(period, cancer_label, id_region, sex) |>
PHEindicatormethods::calculate_dsr(
x = cases, # field containing count of events
n = py, # field containing population denominators
stdpop = EuropeanStandardPopulation, # field containing standard populations
type = "standard"
) |>
mutate(crude_rate = total_count/total_pop*100000) |>
relocate(crude_rate, .before = value) |>
rename(cases = total_count, ASR = value, lower_CI = lowercl, upper_CI = uppercl) |>
mutate(across(c(crude_rate:upper_CI), ~ round(.x, 2))) |>
DT::datatable(
filter = "top",
options = list(
pageLength = 24
),
rownames = FALSE, # set to FALSE for cleaner look
class = 'cell-border stripe hover nowrap compact'
)Using World Standard Population
Code
swiss_XII |>
# filter(
# cancer_label %in% c("Colon", "Corpus uteri", "Prostate"),
# id_region %in% c("Geneva")
# ) |>
group_by(period, cancer_label, id_region, sex) |>
PHEindicatormethods::calculate_dsr(
x = cases, # field containing count of events
n = py, # field containing population denominators
stdpop = WorldStandardPopulation, # field containing standard populations
type = "standard"
)|>
mutate(crude_rate = total_count/total_pop*100000) |>
relocate(crude_rate, .before = value) |>
rename(cases = total_count, ASR = value, lower_CI = lowercl, upper_CI = uppercl) |>
mutate(across(c(crude_rate:upper_CI), ~ round(.x, 2))) |>
DT::datatable(
filter = "top",
options = list(
pageLength = 24
),
rownames = FALSE, # set to FALSE for cleaner look
class = 'cell-border stripe hover nowrap compact'
)By age (crude rates only)
Sex: Male
Code
## World Standard Population
swiss_XII |>
filter(
# cancer_label %in% c("Colon", "Corpus uteri", "Prostate"),
# id_region %in% c("Geneva"),
sex == "Male"
) |>
group_by(cancer_label, id_region, age_WSP) |>
summarise(crude_rate = cases/py*100000, across()) |>
select(cancer_label:crude_rate, cases, py, period) |>
mutate(crude_rate = round(crude_rate, 2)) |>
# Format it as a table
DT::datatable(
filter = "top",
options = list(
pageLength = 18
),
rownames = FALSE, # set to FALSE for cleaner look
class = 'cell-border stripe hover nowrap compact'
)Sex: Female
Code
## World Standard Population
swiss_XII |>
filter(
# cancer_label %in% c("Colon", "Corpus uteri", "Prostate"),
# id_region %in% c("Geneva"),
sex == "Female"
) |>
group_by(cancer_label, id_region, age_WSP) |>
summarise(crude_rate = cases/py*100000, across()) |>
select(cancer_label:crude_rate, cases, py, period) |>
mutate(crude_rate = round(crude_rate, 2)) |>
# Format it as a table
DT::datatable(
filter = "top",
options = list(
pageLength = 18
),
rownames = FALSE, # set to FALSE for cleaner look
class = 'cell-border stripe hover nowrap compact'
)Indirect standardisation
I first aggregated the data to the level of the whole of Switzerland, per cancer type and summing the cases and person-years by age and sex. This reference population was then used to calculate indirectly standardised rates and ratios for each canton for each cancer type, by sex.
Code
# Make "reference" population for Switzerland with total cases and person-years
# per cancer type and by age and sex
swiss_ref_XII <-
swiss_XII |>
group_by(id_country, cancer_label, sex, age_ESP) |>
reframe(cases_ref = sum(cases), py_ref = sum(py)) |>
ungroup() |>
right_join(swiss_XII, join_by(id_country, cancer_label, sex, age_ESP))Indirectly standardised rates
Code
ind.rate <-
swiss_ref_XII |>
group_by(cancer_label, sex, id_region) |>
PHEindicatormethods::calculate_ISRate(
x = cases,
n = py,
x_ref = cases_ref,
n_ref = py_ref,
refpoptype = "field"
) |>
rename(ISRate = value, lower_CI = lowercl, upper_CI = uppercl) |>
mutate(across(c(expected:upper_CI), ~ round(.x, 2)))
# Generate the table
ind.rate |>
select(-c(confidence:method)) |>
DT::datatable(
caption = "Indirectly standardised rate per 100,000, with 95% CI",
filter = "top",
options = list(
pageLength = 13
),
rownames = FALSE, # set to FALSE for cleaner look
class = 'cell-border stripe hover nowrap compact'
)Indirectly standardised ratios
Code
ind.ratio <-
swiss_ref_XII |>
group_by(cancer_label, sex, id_region) |>
PHEindicatormethods::calculate_ISRatio(
x = cases,
n = py,
x_ref = cases_ref,
n_ref = py_ref,
refpoptype = "field"
) |>
rename(ISRatio = value, lower_CI = lowercl, upper_CI = uppercl) |>
mutate(across(c(expected:upper_CI), ~ round(.x, 2)))
# Generate the table
ind.ratio |>
select(-c(confidence:method)) |>
DT::datatable(
caption = "indirectly standardised ratio x 1, with 95% CI",
filter = "top",
options = list(
pageLength = 13
),
rownames = FALSE, # set to FALSE for cleaner look
class = 'cell-border stripe hover nowrap compact'
)Forest plot of ratios
(for a select few cancer types)
Code
ind.ratio |>
filter(cancer_label %in%
c("All sites", "Colon", "Rectum",
"Bladder", "Prostate", "Breast",
"Bone", "Vulva", "Lung (incl. trachea and bronchus)"
)) |>
mutate(est_ci_label = sprintf("%.2f [%.2f-%.2f]",
ISRatio, lower_CI, upper_CI)
) |>
# arrange(id_region, sex) |>
# Start the plot
ggplot(aes(x = ISRatio, y = id_region, color = sex))+
# Add CI
geom_errorbarh(aes(xmin = lower_CI, xmax = upper_CI),
height = 0.4,
linewidth = 0.5,
position = position_dodge(width = 0.5)
)+
# Add point estimate
geom_point(size = 1.5,
position = position_dodge(width = 0.5))+
# Add vertical reference line at ISRatio = 1
geom_vline(xintercept = 1, linetype = "dashed", color = "grey50")+
# Separate plots by cancer type
facet_wrap(~ cancer_label,
labeller = label_wrap_gen(),
ncol = 3
# scales = "free_x" # Use free_y so regions aren't duplicated across facets
)+
# Add labels and title
labs(
title = "Indirectly Standardized Ratio by Canton and Sex",
subtitle = "(reference: national data for Switzerland)",
x = "IS Ratio (95% CI)",
y = NULL,
color = NULL # Legend title
) +
# Use a clean theme
theme_bw(base_size = 12) +
theme(
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
# strip.text = element_text(face = "bold"), # Make facet titles bold
axis.text.y = element_text(size = 12), # Adjust y-axis text size if needed
# panel.spacing.y = unit(1, "lines"), # Add space between facets
legend.position = "top" # Position legend
)CI5 I-XII: Geneva (annual trends)
This section uses annual data from Volumes I to XII of the IARC’s “Cancer Incidence in Five Continents (CI5)” collaboration: https://ci5.iarc.fr/ci5plus/download. The full dataset was then restricted to data from Switzerland-based cancer registries only.
Using European Standard Population
Code
# Prep data
annual_trends_ESP <- swiss_plus |>
filter(!cancer_label %in% c("All cancers excluding non-melanoma skin")) |>
# filter(cancer_label %in% c(
# "Prostate", "Lung (incl. trachea)",
# "Bladder", "Melanoma of skin", "Colon",
# "Rectum and anus", "Breast", "Corpus uteri",
# "Pancreas", "Ovary")
# ) |>
filter(id_region %in% c("Geneva")) |>
# Add grouping variables of interest
group_by(year, cancer_label, id_region, sex) |>
PHEindicatormethods::calculate_dsr(
x = cases, # field containing count of events
n = py, # field containing population denominators
stdpop = EuropeanStandardPopulation, # field containing standard populations
type = "standard"
) |>
mutate(crude_rate = total_count/total_pop*100000) |>
relocate(crude_rate, .before = value) |>
rename(cases = total_count, ASR = value, lower_CI = lowercl, upper_CI = uppercl) |>
mutate(across(c(crude_rate:upper_CI), ~ round(.x, 2))) |>
ungroup()
# Plot
annual_trends_ESP |>
# mutate(sex = as.character(sex)) |>
mutate(year = ymd(paste0(year, "-01-01"))) |>
ggplot(aes(x = year, y = ASR, color = sex, fill = sex))+
geom_ribbon(
aes(ymin = lower_CI, ymax = upper_CI),
color = NA,
alpha =0.2)+
geom_line()+
facet_wrap(.~cancer_label, labeller = label_wrap_gen())+
theme_bw()+
theme(
legend.position = "top",
axis.text.x = element_text(
angle = 40, vjust = 0.5
# , size = 12
))Using World Standard Population
Code
# Prep data
annual_trends_WSP <- swiss_plus |>
filter(!cancer_label %in% c("All cancers excluding non-melanoma skin")) |>
# filter(cancer_label %in% c(
# "Prostate", "Lung (incl. trachea)",
# "Bladder", "Melanoma of skin", "Colon",
# "Rectum and anus", "Breast", "Corpus uteri",
# "Pancreas", "Ovary")
# ) |>
filter(id_region %in% c("Geneva")) |>
# Add grouping variables of interest
group_by(year, cancer_label, id_region, sex) |>
PHEindicatormethods::calculate_dsr(
x = cases, # field containing count of events
n = py, # field containing population denominators
stdpop = WorldStandardPopulation, # field containing standard populations
type = "standard"
) |>
mutate(crude_rate = total_count/total_pop*100000) |>
relocate(crude_rate, .before = value) |>
rename(cases = total_count, ASR = value, lower_CI = lowercl, upper_CI = uppercl) |>
mutate(across(c(crude_rate:upper_CI), ~ round(.x, 2))) |>
ungroup()
# Plot
annual_trends_WSP |>
mutate(year = ymd(paste0(year, "-01-01"))) |>
ggplot(aes(x = year, y = ASR, fill = sex, color = sex))+
geom_ribbon(
aes(ymin = lower_CI, ymax = upper_CI),
color = NA,
alpha =0.2
)+
geom_line()+
facet_wrap(.~cancer_label, labeller = label_wrap_gen())+
theme_bw()+
theme(
legend.position = "top",
axis.text.x = element_text(
angle = 40, vjust = 0.5
# , size = 12
))